### GED Data prediction

#### Configuration #####
library(dplyr)
library(randomForestSRC)
library(xgboost)
library(MLmetrics)

options(scipen=999)
setwd("E:/Fortis/Workspace/Views Competition/Update September 2020")
memory.size(64000)


###### Load Data #####
views.ged <- read.csv("views_ged.csv", header=TRUE)

na.omit(views.ged) -> views.ged



# Backup
views.ged -> backup.ged


## Change to scale=true after running RF versions to rerun in XGBoost ####

# Scale
views.ged -> views.xgb
scale(views.xgb[,7:48]) -> views.xgb[,7:48]
views.xgb -> views.ged

# Reset to unscale
backup.ged -> views.ged


## LEARN - PREDICT SETUP for 3 batches of forecasts: #####


### Task 2: January 2017 - Dec 2019 Forecasts (m_id 445-480) ####



# s-1: Setting test set (for s-1 444-479 is prediction set)
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 443)
views.ged.pred.t2  <- filter(views.ged, month_id >= 444 & month_id <= 479)


#+#+#

###### NA.OMIT TEST
# s-2: Setting test set (for s-2 443-478 is prediction set)
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 442)
views.ged.pred.t2  <- filter(views.ged, month_id >= 443 & month_id <= 478)

#+#+#


# s-3: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 441)
views.ged.pred.t2  <- filter(views.ged, month_id >= 442 & month_id <= 477)

## Ensemble stacking

# take predicted directly from RF train object. pred is only for XGB

# s-3: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 441)
views.ged.pred.t2  <- filter(views.ged, month_id >= 114 & month_id <= 441)

####
# s-3: Setting test set
views.ged.learn.and.pred <- filter(views.ged, month_id >= 114 & month_id <= 441)


Dindex <- caret::createDataPartition(
  y = views.ged.learn.and.pred$ln_ged_best_sb_s3,
  ## the outcome data are needed
  p = .80,
  ## The percentage of data in the
  ## training set
  list = FALSE)

views.ged.learn.t2 <- views.ged.learn.and.pred[Dindex,]
views.ged.pred.t2 <- views.ged.learn.and.pred[-Dindex,]

###

## STACKING V.4.0 

########
########
######## TRAINING SET 114 - 405
######## STACKING SET 406 - 441
######## TESTING SET 442 - 477

views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 405)
views.ged.pred.t2  <- filter(views.ged, month_id >= 406 & month_id <= 441)




###

# s-4: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 440)
views.ged.pred.t2  <- filter(views.ged, month_id >= 441 & month_id <= 476)

# s-5: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 439)
views.ged.pred.t2  <- filter(views.ged, month_id >= 440 & month_id <= 475)

# s-6: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 438)
views.ged.pred.t2  <- filter(views.ged, month_id >= 439 & month_id <= 474)

# s-7: Setting test set
views.ged.learn.t2 <- filter(views.ged, month_id >= 114 & month_id <= 437)
views.ged.pred.t2  <- filter(views.ged, month_id >= 438 & month_id <= 473)

#+#


### Task 3: January 2014 - Dec 2016 Forecasts (m_id 409-444) ####

# s-1: Setting test set (for s-1 408-443 is prediction set)
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 407)
views.ged.pred.t3  <- filter(views.ged, month_id >= 408 & month_id <= 443)

# s-2: Setting test set (for s-2 407-442 is prediction set)
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 406)
views.ged.pred.t3  <- filter(views.ged, month_id >= 407 & month_id <= 442)

# s-3: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 405)
views.ged.pred.t3  <- filter(views.ged, month_id >= 406 & month_id <= 441)

# s-4: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 404)
views.ged.pred.t3  <- filter(views.ged, month_id >= 405 & month_id <= 440)

# s-5: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 403)
views.ged.pred.t3  <- filter(views.ged, month_id >= 404 & month_id <= 439)

# s-6: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 402)
views.ged.pred.t3  <- filter(views.ged, month_id >= 403 & month_id <= 438)

# s-7: Setting  test set
views.ged.learn.t3 <- filter(views.ged, month_id >= 114 & month_id <= 401)
views.ged.pred.t3  <- filter(views.ged, month_id >= 402 & month_id <= 437)


#+#

### Task 1: True forecasts Oct 2020 - March 2021 (m_id 490-495) ####
### Learn for prediction of October 2020 s2 (learn only till 486 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 486)
ged.pred.true  <- filter(views.ged, month_id == 488)

### Learn for prediction of November 2020 s3 (learn only till 485 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 485)
ged.pred.true  <- filter(views.ged, month_id >= 487 & month_id <= 488)

### Learn for prediction of Dezember 2020 s4 (learn only till 484 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 484)
ged.pred.true  <- filter(views.ged, month_id >= 486 & month_id <= 488)

### Learn for prediction of January 2021 s5 (learn only till 483 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 483)
ged.pred.true  <- filter(views.ged, month_id >= 485 & month_id <= 488)

### Learn for prediction of February 2021 s6 (learn only till 482 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 482)
ged.pred.true  <- filter(views.ged, month_id >= 484 & month_id <= 488)

### Learn for prediction of March 2021 s7 (learn only till 481 possible)
ged.learn.true <- filter(views.ged, month_id >= 114 & month_id <= 481)
ged.pred.true  <- filter(views.ged, month_id >= 483 & month_id <= 488)


#+#+#+#+#+#+#
# RF Predictions #####
#+#+#+#+#+#+#

##### RF.01 prediction test ######
rf.ged.01 <- rfsrc(ln_ged_best_sb_s2 ~., views.ged.learn[,c(7:48, 57)], ntree = 75,
                        na.action="na.impute")

rf.ged.01

prediction.ged.01 <- predict(rf.ged.01, views.ged.pred, na.action = "na.impute")

prediction.ged.01
#+#+# #####

#### RF Task 2 - s1-6 Forecasts #####
rf.t2.ged.s1 <- rfsrc(ln_ged_best_sb_s1 ~., views.ged.learn.t2[,c(7:48, 56)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s2 <- rfsrc(ln_ged_best_sb_s2 ~., views.ged.learn.t2[,c(7:48, 57)], ntree = 350,
                   na.action="na.omit")
###

rf.t2.ged.s3 <- rfsrc(ln_ged_best_sb_s3 ~., views.ged.learn.t2[,c(7:48, 58)], ntree = 350,
                      na.action="na.omit")

###

rf.t2.ged.s4 <- rfsrc(ln_ged_best_sb_s4 ~., views.ged.learn.t2[,c(7:48, 59)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s5 <- rfsrc(ln_ged_best_sb_s5 ~., views.ged.learn.t2[,c(7:48, 60)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s6 <- rfsrc(ln_ged_best_sb_s6 ~., views.ged.learn.t2[,c(7:48, 61)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s7 <- rfsrc(ln_ged_best_sb_s7 ~., views.ged.learn.t2[,c(7:48, 62)], ntree = 350,
                      na.action="na.impute")

rf.t2.ged.s1
rf.t2.ged.s2
rf.t2.ged.s3
rf.t2.ged.s4
rf.t2.ged.s5
rf.t2.ged.s6
rf.t2.ged.s7

rm(rf.t2.ged.s1)
rm(rf.t2.ged.s2)
rm(rf.t2.ged.s3)
rm(rf.t2.ged.s4)
rm(rf.t2.ged.s5)
rm(rf.t2.ged.s6)

## Predict Task 2
pr.t2.ged.s1 <- predict(rf.t2.ged.s1, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s2 <- predict(rf.t2.ged.s2, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s3 <- predict(rf.t2.ged.s3, views.ged.pred.t2, na.action = "na.omit")
pr.t2.ged.s4 <- predict(rf.t2.ged.s4, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s5 <- predict(rf.t2.ged.s5, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s6 <- predict(rf.t2.ged.s6, views.ged.pred.t2, na.action = "na.impute")
pr.t2.ged.s7 <- predict(rf.t2.ged.s7, views.ged.pred.t2, na.action = "na.omit")

pr.t2.ged.s1
pr.t2.ged.s2
pr.t2.ged.s3
pr.t2.ged.s4
pr.t2.ged.s5
pr.t2.ged.s6
pr.t2.ged.s7

rm(pr.t2.ged.s1)
rm(pr.t2.ged.s2)
rm(pr.t2.ged.s3)
rm(pr.t2.ged.s4)
rm(pr.t2.ged.s5)
rm(pr.t2.ged.s6)

#### RF Task 3 - s1-6 Forecasts #####
rf.t3.ged.s1 <- rfsrc(ln_ged_best_sb_s1 ~., views.ged.learn.t3[,c(7:48, 56)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s2 <- rfsrc(ln_ged_best_sb_s2 ~., views.ged.learn.t3[,c(7:48, 57)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s3 <- rfsrc(ln_ged_best_sb_s3 ~., views.ged.learn.t3[,c(7:48, 58)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s4 <- rfsrc(ln_ged_best_sb_s4 ~., views.ged.learn.t3[,c(7:48, 59)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s5 <- rfsrc(ln_ged_best_sb_s5 ~., views.ged.learn.t3[,c(7:48, 60)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s6 <- rfsrc(ln_ged_best_sb_s6 ~., views.ged.learn.t3[,c(7:48, 61)], ntree = 350,
                      na.action="na.impute")

rf.t3.ged.s7 <- rfsrc(ln_ged_best_sb_s7 ~., views.ged.learn.t3[,c(7:48, 62)], ntree = 350,
                      na.action="na.impute")

rm(rf.t3.ged.s1)
rm(rf.t3.ged.s2)
rm(rf.t3.ged.s3)
rm(rf.t3.ged.s4)
rm(rf.t3.ged.s5)
rm(rf.t3.ged.s6)



rf.t3.ged.s1
rf.t3.ged.s2
rf.t3.ged.s3
rf.t3.ged.s4
rf.t3.ged.s5
rf.t3.ged.s6
rf.t3.ged.s7

## Predict Task 3
pr.t3.ged.s1 <- predict(rf.t3.ged.s1, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s2 <- predict(rf.t3.ged.s2, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s3 <- predict(rf.t3.ged.s3, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s4 <- predict(rf.t3.ged.s4, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s5 <- predict(rf.t3.ged.s5, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s6 <- predict(rf.t3.ged.s6, views.ged.pred.t3, na.action = "na.impute")
pr.t3.ged.s7 <- predict(rf.t3.ged.s7, views.ged.pred.t3, na.action = "na.impute")

rm(pr.t3.ged.s1)
rm(pr.t3.ged.s2)
rm(pr.t3.ged.s3)
rm(pr.t3.ged.s4)
rm(pr.t3.ged.s5)
rm(pr.t3.ged.s6)

pr.t3.ged.s1
pr.t3.ged.s2
pr.t3.ged.s3
pr.t3.ged.s4
pr.t3.ged.s5
pr.t3.ged.s6
pr.t3.ged.s7






#### RF Task 1 - True 2020-21 predictions #####

### True S2 - October 2020
rf.t1.ged.s2 <- rfsrc(ln_ged_best_sb_s2 ~., ged.learn.true[,c(7:48, 57)], ntree = 350,
                           na.action="na.impute")

### True S3 - Oct & Nov 2020
rf.t1.ged.s3 <- rfsrc(ln_ged_best_sb_s3 ~., ged.learn.true[,c(7:48, 58)], ntree = 350,
                      na.action="na.impute")

### True S4 - Oct & Nov & Dec 2020
rf.t1.ged.s4 <- rfsrc(ln_ged_best_sb_s4 ~., ged.learn.true[,c(7:48, 59)], ntree = 350,
                      na.action="na.impute")

### True S5 - Oct & Nov & Dec 2020 & Jan 2021
rf.t1.ged.s5 <- rfsrc(ln_ged_best_sb_s5 ~., ged.learn.true[,c(7:48, 60)], ntree = 350,
                      na.action="na.impute")

### True S6 - Oct & Nov & Dec 2020 & Jan & Feb 2021
rf.t1.ged.s6 <- rfsrc(ln_ged_best_sb_s6 ~., ged.learn.true[,c(7:48, 61)], ntree = 350,
                      na.action="na.impute")

### True S7 - Oct & Nov & Dec 2020 & Jan & Feb & March 2021
rf.t1.ged.s7 <- rfsrc(ln_ged_best_sb_s7 ~., ged.learn.true[,c(7:48, 62)], ntree = 350,
                      na.action="na.impute")

rf.t1.ged.s2
rf.t1.ged.s3
rf.t1.ged.s4
rf.t1.ged.s5
rf.t1.ged.s6
rf.t1.ged.s7


pr.t1.ged.s2 <- predict(rf.t1.ged.s2, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s3 <- predict(rf.t1.ged.s3, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s4 <- predict(rf.t1.ged.s4, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s5 <- predict(rf.t1.ged.s5, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s6 <- predict(rf.t1.ged.s6, ged.pred.true, na.action = "na.impute")
pr.t1.ged.s7 <- predict(rf.t1.ged.s7, ged.pred.true, na.action = "na.impute")


rm(pr.t1.ged.s3)
rm(rf.t1.ged.s3)
rm(pr.t1.ged.s4)
rm(rf.t1.ged.s4)


##### XG Boost prediction #####
## Scaling for XGBoost ####
rm(views.learn.xgb)

views.ged -> views.xgb
scale(views.xgb[,7:48]) -> views.xgb[,7:48]

views.learn.xgb <- filter(views.xgb, month_id >= 114 & month_id <= 442)
views.pred.xgb  <- filter(views.xgb,month_id >= 443 & month_id <= 478)
#####


### s2 - month 490
views.learn.xgb <- filter(views.xgb, month_id >= 114 & month_id <= 486)
views.pred.xgb  <- filter(views.xgb, month_id == 488)

na.omit(views.learn.xgb) -> views.learn.xgb
na.omit(views.pred.xgb) -> views.pred.xgb
#####

#### NA omit for Task 1 ####
na.omit(ged.learn.true) -> views.learn.xgb
ged.pred.true -> views.pred.xgb

#### NA omit for Task 2 ####
na.omit(views.ged.learn.t2) -> views.learn.xgb
na.omit(views.ged.pred.t2) -> views.pred.xgb

#### NA omit for Task 3 ####
na.omit(views.ged.learn.t3) -> views.learn.xgb
views.ged.pred.t3 -> views.pred.xgb


#####
## careful from here all tasks end up in one pipeline that overwrites itself every run for XGB
#### prepare data for XGBoost by setting label and train as matrix
#*'!!!'*# ADJUST S-X Variable!

train.data.xgb = as.matrix(views.learn.xgb[,7:48])
train.label.xgb = as.matrix(views.learn.xgb[, 58])
test.data.xgb = as.matrix(views.pred.xgb[,7:48])
test.label.xgb = as.matrix(views.pred.xgb[, 58])



### XGBoost train ####
xgb.001 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                   objective = "reg:linear", 
                   eval_metric = "rmse",
                   max.depth =25, 
                   eta = 0.01, 
                   nround = 750, 
                   subsample = 0.5, 
                   colsample_bytree = 0.5, 
                   nthread = 3
)

### TASK 1 XGBoost Train ####
xgb.t1 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                  objective = "reg:linear", 
                  eval_metric = "rmse",
                  max.depth = 25, 
                  eta = 0.01, 
                  nround = 950, 
                  subsample = 0.5, 
                  colsample_bytree = 0.5, 
                  nthread = 3
)

### Task 2 XGBoost Train ####
xgb.t2 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                       objective = "reg:linear", 
                       eval_metric = "rmse",
                       max.depth = 25, 
                       eta = 0.01, 
                       nround = 950, 
                       subsample = 0.5, 
                       colsample_bytree = 0.5
)

### Task 3 XGBoost Train ####

xgb.t3 <- xgboost(data= train.data.xgb, label=train.label.xgb,
                  objective = "reg:linear", 
                  eval_metric = "rmse",
                  max.depth = 25, 
                  eta = 0.01, 
                  nround = 950, 
                  subsample = 0.5, 
                  colsample_bytree = 0.5, 
                  nthread = 3
)

#####

## Test #####
importance.xgb.001 <- xgb.importance(model=xgb.001)

xgb.pre.001 = predict(xgb.001,test.data.xgb,reshape=T)
xgb.pre.001.frame <- as.data.frame(xgb.pre.001)
xgb.pre.001.frame$real <- test.label.xgb



## Task 1 #####
importance.xgb.t1.s3 <- xgb.importance(model=xgb.t1)
write.csv(importance.xgb.t1.s3, file="importance_task1_s3_ged.csv", row.names=FALSE)

xgb.pre.t1 = predict(xgb.t1, test.data.xgb, reshape=T)
xgb.pre.t1.frame <- as.data.frame(xgb.pre.t1)


## Task 2 #####
importance.xgb.t2.s3 <- xgb.importance(model=xgb.t2)
write.csv(importance.xgb.t2.s3, file="importance_task2_s3_ged.csv", row.names=FALSE)

xgb.pre.t2 = predict(xgb.t2, test.data.xgb, reshape=T)
xgb.pre.t2.frame <- as.data.frame(xgb.pre.t2)


## Task 2 #####
importance.xgb.t3.s1 <- xgb.importance(model=xgb.t3)
write.csv(importance.xgb.t3.s1, file="importance_task3_s1_ged.csv", row.names=FALSE)

xgb.pre.t3 = predict(xgb.t3, test.data.xgb, reshape=T)
xgb.pre.t3.frame <- as.data.frame(xgb.pre.t3)






#####

#### Collect Prediction Results in Dataframe #####

Pred.Ensemble.GED <- as.data.frame(pr.t2.ged.s3[["predicted"]])
colnames(Pred.Ensemble.GED) <- c("RF.GED.t2.s3")

Pred.Ensemble.GED$XGB.GED.t2.s3 <- xgb.pre.t2.frame$xgb.pre.t2


# RF.pred.compare2$real.sb <- views.pred.small$ln_ged_best_sb

Pred.Ensemble.GED$month_id <- views.ged.pred.t2$month_id+3
Pred.Ensemble.GED$country_id <- views.ged.pred.t2$country_id
Pred.Ensemble.GED$country_name <- views.ged.pred.t2$country_name
Pred.Ensemble.GED$real <- pr.t2.ged.s3[["yvar"]]

MSE(Pred.Ensemble.GED$RF.GED.t2.s3, Pred.Ensemble.GED$real)
MSE(Pred.Ensemble.GED$XGB.GED.t2.s3, Pred.Ensemble.GED$real)

write.csv(Pred.Ensemble.GED, file="Ensemble_Stacking_V4_Part1.csv", row.names=FALSE) 


## MSE test
MSE(Pred.Ensemble.GED$RF.01, Pred.Ensemble.GED$real)
MSE(Pred.Ensemble.GED$XGBoost.01, Pred.Ensemble.GED$real)

## sync test
MSE(Pred.Ensemble.GED$XGBoost.01, Pred.Ensemble.GED$RF.01)

#####


#####################
#####################
#####################
#######
#######
#####################
#####################
#####################
       ##############
       ##############
       ##############
#####################
#####################
#####################

views.ged.pred.t2 -> test
views.ged.pred.t2 -> test2


pr.t2.ged.s2[["predicted"]] -> test$pr.s2.without.Imp
pr.t2.ged.s2[["predicted"]] -> test$pr.s2.with.Imp

pr.t2.ged.s7[["predicted"]] -> test2$pr.s7.without.Imp
pr.t2.ged.s7[["predicted"]] -> test2$pr.s7.with.Imp



test.africa2 <- filter(test2, in_africa==1)

MSE(test.africa$ln_ged_best_sb_s2, test.africa$pr.s2.without.Imp)
MSE(test.africa2$ln_ged_best_sb_s7, test.africa2$pr.s7.without.Imp)

MSE(test2$pr.s7.with.Imp, test2$pr.s7.without.Imp)

write.csv(test2, file="NA_Testing_T2_S7.csv", row.names=FALSE)
























#### Collect Prediction Results in Dataframe for Task 2: 2017-2019 predictions #####






















## S-1
Pred.Ensemble.Task2.s1 <- as.data.frame(pr.t2.ged.s1[["predicted"]])
colnames(Pred.Ensemble.Task2.s1) <- c("RF.GED.t2.s1")

Pred.Ensemble.Task2.s1$XGB.GED.t2.s1 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s1, file="Pred_Ensemble_Task2_s1.csv", row.names=FALSE)


## s-2
Pred.Ensemble.Task2.s2 <- as.data.frame(pr.t2.ged.s2[["predicted"]])
colnames(Pred.Ensemble.Task2.s2) <- c("RF.GED.t2.s2")

Pred.Ensemble.Task2.s2$XGB.GED.t2.s2 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s2, file="Pred_Ensemble_Task2_s2.csv", row.names=FALSE)


## s-3
Pred.Ensemble.Task2.s3 <- as.data.frame(pr.t2.ged.s3[["predicted"]])
colnames(Pred.Ensemble.Task2.s3) <- c("RF.GED.t2.s3")

Pred.Ensemble.Task2.s3$XGB.GED.t2.s3 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s3, file="Pred_Ensemble_Task2_s3.csv", row.names=FALSE)


## s-4
Pred.Ensemble.Task2.s4 <- as.data.frame(pr.t2.ged.s4[["predicted"]])
colnames(Pred.Ensemble.Task2.s4) <- c("RF.GED.t2.s4")

Pred.Ensemble.Task2.s4$XGB.GED.t2.s4 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s4, file="Pred_Ensemble_Task2_s4.csv", row.names=FALSE)


## s-5
Pred.Ensemble.Task2.s5 <- as.data.frame(pr.t2.ged.s5[["predicted"]])
colnames(Pred.Ensemble.Task2.s5) <- c("RF.GED.t2.s5")

Pred.Ensemble.Task2.s5$XGB.GED.t2.s5 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s5, file="Pred_Ensemble_Task2_s5.csv", row.names=FALSE)


## s-6
Pred.Ensemble.Task2.s6 <- as.data.frame(pr.t2.ged.s6[["predicted"]])
colnames(Pred.Ensemble.Task2.s6) <- c("RF.GED.t2.s6")

Pred.Ensemble.Task2.s6$XGB.GED.t2.s6 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s6, file="Pred_Ensemble_Task2_s6.csv", row.names=FALSE)


## s-7
Pred.Ensemble.Task2.s7 <- as.data.frame(pr.t2.ged.s7[["predicted"]])
colnames(Pred.Ensemble.Task2.s7) <- c("RF.GED.t2.s7")

Pred.Ensemble.Task2.s7$XGB.GED.t2.s7 <- xgb.pre.t2.frame$xgb.pre.t2

write.csv(Pred.Ensemble.Task2.s7, file="Pred_Ensemble_Task2_s7.csv", row.names=FALSE)



##check MSE between XGB & RF - Adjust all (4x) sX
MSE(Pred.Ensemble.Task2.s6$XGB.GED.t2.s6, Pred.Ensemble.Task2.s6$RF.GED.t2.s6)
#+#


#### Collect Prediction Results in Dataframe for Task 1: 2020-21 predictions #####

## s-3
Pred.Ensemble.Task1.s3 <- as.data.frame(pr.t1.ged.s3[["predicted"]])
colnames(Pred.Ensemble.Task1.s3) <- c("RF.GED.t1.s3")

Pred.Ensemble.Task1.s3$XGB.GED.t1.s3 <- xgb.pre.t1.frame$xgb.pre.t1

Pred.Ensemble.Task1.s3$month_id <- ged.pred.true$month_id+3
Pred.Ensemble.Task1.s3$country_id <- ged.pred.true$country_id
Pred.Ensemble.Task1.s3$country_name <- ged.pred.true$country_name

write.csv(Pred.Ensemble.Task1.s3, file="Pred_Ensemble_Task1_s3.csv", row.names=FALSE)


## s-4
Pred.Ensemble.Task1.s4 <- as.data.frame(pr.t1.ged.s4[["predicted"]])
colnames(Pred.Ensemble.Task1.s4) <- c("RF.GED.t1.s4")

Pred.Ensemble.Task1.s4$XGB.GED.t1.s4 <- xgb.pre.t1.frame$xgb.pre.t1

Pred.Ensemble.Task1.s4$month_id <- ged.pred.true$month_id+4
Pred.Ensemble.Task1.s4$country_id <- ged.pred.true$country_id
Pred.Ensemble.Task1.s4$country_name <- ged.pred.true$country_name

write.csv(Pred.Ensemble.Task1.s4, file="Pred_Ensemble_Task1_s4.csv", row.names=FALSE)


## s-5
Pred.Ensemble.Task1.s5 <- as.data.frame(pr.t1.ged.s5[["predicted"]])
colnames(Pred.Ensemble.Task1.s5) <- c("RF.GED.t1.s5")

Pred.Ensemble.Task1.s5$XGB.GED.t1.s5 <- xgb.pre.t1.frame$xgb.pre.t1

Pred.Ensemble.Task1.s5$month_id <- ged.pred.true$month_id+5
Pred.Ensemble.Task1.s5$country_id <- ged.pred.true$country_id
Pred.Ensemble.Task1.s5$country_name <- ged.pred.true$country_name

write.csv(Pred.Ensemble.Task1.s5, file="Pred_Ensemble_Task1_s5.csv", row.names=FALSE)


## s-6
Pred.Ensemble.Task1.s6 <- as.data.frame(pr.t1.ged.s6[["predicted"]])
colnames(Pred.Ensemble.Task1.s6) <- c("RF.GED.t1.s6")

Pred.Ensemble.Task1.s6$XGB.GED.t1.s6 <- xgb.pre.t1.frame$xgb.pre.t1

Pred.Ensemble.Task1.s6$month_id <- ged.pred.true$month_id+6
Pred.Ensemble.Task1.s6$country_id <- ged.pred.true$country_id
Pred.Ensemble.Task1.s6$country_name <- ged.pred.true$country_name

write.csv(Pred.Ensemble.Task1.s6, file="Pred_Ensemble_Task1_s6.csv", row.names=FALSE)


## s-7
Pred.Ensemble.Task1.s7 <- as.data.frame(pr.t1.ged.s7[["predicted"]])
colnames(Pred.Ensemble.Task1.s7) <- c("RF.GED.t1.s7")

Pred.Ensemble.Task1.s7$XGB.GED.t1.s7 <- xgb.pre.t1.frame$xgb.pre.t1

Pred.Ensemble.Task1.s7$month_id <- ged.pred.true$month_id+7
Pred.Ensemble.Task1.s7$country_id <- ged.pred.true$country_id
Pred.Ensemble.Task1.s7$country_name <- ged.pred.true$country_name

write.csv(Pred.Ensemble.Task1.s7, file="Pred_Ensemble_Task1_s7.csv", row.names=FALSE)



## old setup s-2
Pred.Ensemble.October2020 <- as.data.frame(pred.s2.true[["predicted"]])
colnames(Pred.Ensemble.October2020) <- c("RF.GED.s2.true")

Pred.Ensemble.October2020$XGBoost.GED.s2.true <- xgb.pre.s2.true.frame$xgb.pre.s2.true

write.csv(Pred.Ensemble.October2020, file="Pred_Ensemble_Oct2020.csv", row.names=FALSE)


####
Pred.Ensemble.October2020$month_id <- ged.pred.s2.monthid.490$month_id+2
Pred.Ensemble.October2020$country_id <- ged.pred.s2.monthid.490$country_id
Pred.Ensemble.October2020$country_name <- ged.pred.s2.monthid.490$country_name

#reorder by column index
data <- data[c(1,3,2)]

####
obj  <- filter(Pred.Ensemble.November2020, month_id=490)
# left_join obj

MSE(Pred.Ensemble.October2020$RF.GED.s2.monthid.490, Pred.Ensemble.October2020$XGBoost.GED.s2.monthid.490)
MSE(Pred.Ensemble.Task1.s3$XGB.GED.t1.s3, Pred.Ensemble.Task1.s3$RF.GED.t1.s3)
MSE(Pred.Ensemble.Task1.s7$XGB.GED.t1.s7, Pred.Ensemble.Task1.s7$RF.GED.t1.s7)


#### Collect Prediction Results in Dataframe for Task 3: 2014-2016 predictions #####

## s-1
Pred.Ensemble.Task3.s1 <- as.data.frame(pr.t3.ged.s1[["predicted"]])
colnames(Pred.Ensemble.Task3.s1) <- c("RF.GED.t3.s1")

Pred.Ensemble.Task3.s1$XGB.GED.t3.s1 <- xgb.pre.t3.frame$xgb.pre.t3

Pred.Ensemble.Task3.s1$month_id <- views.ged.pred.t3$month_id+1
Pred.Ensemble.Task3.s1$country_id <- views.ged.pred.t3$country_id
Pred.Ensemble.Task3.s1$country_name <- views.ged.pred.t3$country_name

write.csv(Pred.Ensemble.Task3.s1, file="Pred_Ensemble_Task3_s1.csv", row.names=FALSE)


## s-2
Pred.Ensemble.Task3.s2 <- as.data.frame(pr.t3.ged.s2[["predicted"]])
colnames(Pred.Ensemble.Task3.s2) <- c("RF.GED.t3.s2")

Pred.Ensemble.Task3.s2$XGB.GED.t3.s2 <- xgb.pre.t3.frame$xgb.pre.t3

Pred.Ensemble.Task3.s2$month_id <- views.ged.pred.t3$month_id+2
Pred.Ensemble.Task3.s2$country_id <- views.ged.pred.t3$country_id
Pred.Ensemble.Task3.s2$country_name <- views.ged.pred.t3$country_name

write.csv(Pred.Ensemble.Task3.s2, file="Pred_Ensemble_Task3_s2.csv", row.names=FALSE)


## s-3
Pred.Ensemble.Task3.s3 <- as.data.frame(pr.t3.ged.s3[["predicted"]])
colnames(Pred.Ensemble.Task3.s3) <- c("RF.GED.t3.s3")

Pred.Ensemble.Task3.s3$XGB.GED.t3.s3 <- xgb.pre.t3.frame$xgb.pre.t3

Pred.Ensemble.Task3.s3$month_id <- views.ged.pred.t3$month_id+3
Pred.Ensemble.Task3.s3$country_id <- views.ged.pred.t3$country_id
Pred.Ensemble.Task3.s3$country_name <- views.ged.pred.t3$country_name

write.csv(Pred.Ensemble.Task3.s3, file="Pred_Ensemble_Task3_s3.csv", row.names=FALSE)

## s-4
Pred.Ensemble.Task3.s4 <- as.data.frame(pr.t3.ged.s4[["predicted"]])
colnames(Pred.Ensemble.Task3.s4) <- c("RF.GED.t3.s4")

Pred.Ensemble.Task3.s4$XGB.GED.t3.s4 <- xgb.pre.t3.frame$xgb.pre.t3

Pred.Ensemble.Task3.s4$month_id <- views.ged.pred.t3$month_id+4
Pred.Ensemble.Task3.s4$country_id <- views.ged.pred.t3$country_id
Pred.Ensemble.Task3.s4$country_name <- views.ged.pred.t3$country_name

write.csv(Pred.Ensemble.Task3.s4, file="Pred_Ensemble_Task3_s4.csv", row.names=FALSE)



## s-5
Pred.Ensemble.Task3.s5 <- as.data.frame(pr.t3.ged.s5[["predicted"]])
colnames(Pred.Ensemble.Task3.s5) <- c("RF.GED.t3.s5")

Pred.Ensemble.Task3.s5$XGB.GED.t3.s5 <- xgb.pre.t3.frame$xgb.pre.t3

Pred.Ensemble.Task3.s5$month_id <- views.ged.pred.t3$month_id+5
Pred.Ensemble.Task3.s5$country_id <- views.ged.pred.t3$country_id
Pred.Ensemble.Task3.s5$country_name <- views.ged.pred.t3$country_name

write.csv(Pred.Ensemble.Task3.s5, file="Pred_Ensemble_Task3_s5.csv", row.names=FALSE)



## s-6
Pred.Ensemble.Task3.s6 <- as.data.frame(pr.t3.ged.s6[["predicted"]])
colnames(Pred.Ensemble.Task3.s6) <- c("RF.GED.t3.s6")

Pred.Ensemble.Task3.s6$XGB.GED.t3.s6 <- xgb.pre.t3.frame$xgb.pre.t3

Pred.Ensemble.Task3.s6$month_id <- views.ged.pred.t3$month_id+6
Pred.Ensemble.Task3.s6$country_id <- views.ged.pred.t3$country_id
Pred.Ensemble.Task3.s6$country_name <- views.ged.pred.t3$country_name

write.csv(Pred.Ensemble.Task3.s6, file="Pred_Ensemble_Task3_s6.csv", row.names=FALSE)


## s-7
Pred.Ensemble.Task3.s7 <- as.data.frame(pr.t3.ged.s7[["predicted"]])
colnames(Pred.Ensemble.Task3.s7) <- c("RF.GED.t3.s7")

Pred.Ensemble.Task3.s7$XGB.GED.t3.s7 <- xgb.pre.t3.frame$xgb.pre.t3

Pred.Ensemble.Task3.s7$month_id <- views.ged.pred.t3$month_id+7
Pred.Ensemble.Task3.s7$country_id <- views.ged.pred.t3$country_id
Pred.Ensemble.Task3.s7$country_name <- views.ged.pred.t3$country_name

write.csv(Pred.Ensemble.Task3.s7, file="Pred_Ensemble_Task3_s7.csv", row.names=FALSE)

